Fix cpufreq HW-ALL coordination handle
authorKeir Fraser <keir.fraser@citrix.com>
Tue, 14 Apr 2009 10:20:55 +0000 (11:20 +0100)
committerKeir Fraser <keir.fraser@citrix.com>
Tue, 14 Apr 2009 10:20:55 +0000 (11:20 +0100)
Currently cpufreq HW-ALL coordination is handled same way as SW-ALL.
However, SW-ALL will bring more IPIs which is bad for cpuidle.
This patch implement HW-ALL coordination handled in different way from
SW-ALL, for the sake of performance and reduce IPIs. We also
suspend/resume HW-ALL dbs timer for idle.

Signed-off-by: Yu, Ke <ke.yu@intel.com>
Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
Signed-off-by: Tian, Kevin <kevin.tian@intel.com>
xen/arch/x86/acpi/cpu_idle.c
xen/arch/x86/acpi/cpufreq/cpufreq.c
xen/drivers/cpufreq/cpufreq.c
xen/drivers/cpufreq/cpufreq_ondemand.c
xen/include/acpi/cpufreq/cpufreq.h

index a7ca18826e59a5c7ffaef147e36cc26e4f18f32c..856572a0896bc1a06f018ebc5869845b0f1a2a9d 100644 (file)
@@ -47,6 +47,7 @@
 #include <asm/processor.h>
 #include <public/platform.h>
 #include <public/sysctl.h>
+#include <acpi/cpufreq/cpufreq.h>
 
 /*#define DEBUG_PM_CX*/
 
@@ -195,6 +196,8 @@ static void acpi_processor_idle(void)
     int sleep_ticks = 0;
     u32 t1, t2 = 0;
 
+    cpufreq_dbs_timer_suspend();
+
     sched_tick_suspend();
     /*
      * sched_tick_suspend may raise TIMER_SOFTIRQ by __stop_timer,
@@ -214,6 +217,7 @@ static void acpi_processor_idle(void)
     {
         local_irq_enable();
         sched_tick_resume();
+        cpufreq_dbs_timer_resume();
         return;
     }
 
@@ -234,6 +238,7 @@ static void acpi_processor_idle(void)
         else
             acpi_safe_halt();
         sched_tick_resume();
+        cpufreq_dbs_timer_resume();
         return;
     }
 
@@ -341,6 +346,7 @@ static void acpi_processor_idle(void)
     default:
         local_irq_enable();
         sched_tick_resume();
+        cpufreq_dbs_timer_resume();
         return;
     }
 
@@ -352,6 +358,7 @@ static void acpi_processor_idle(void)
     }
 
     sched_tick_resume();
+    cpufreq_dbs_timer_resume();
 
     if ( cpuidle_current_governor->reflect )
         cpuidle_current_governor->reflect(power);
index cda7fb40aa28511e9fc92da5f455f9c4dba82ebd..1631a30935647ef87a81901620fcd252b7154730 100644 (file)
@@ -191,7 +191,11 @@ static void drv_read(struct drv_cmd *cmd)
 
 static void drv_write(struct drv_cmd *cmd)
 {
-    on_selected_cpus( cmd->mask, do_drv_write, (void *)cmd, 0, 0);
+    if ((cpus_weight(cmd->mask) ==  1) &&
+        cpu_isset(smp_processor_id(), cmd->mask))
+        do_drv_write((void *)cmd);
+    else
+        on_selected_cpus( cmd->mask, do_drv_write, (void *)cmd, 0, 0);
 }
 
 static u32 get_cur_val(cpumask_t mask)
index efb805b01cda26592055061ab2473b423e6f8981..39cc7eba61f053e571b87fc59e6f8e74865df7ae 100644 (file)
@@ -130,7 +130,7 @@ int cpufreq_add_cpu(unsigned int cpu)
     int ret = 0;
     unsigned int firstcpu;
     unsigned int dom, domexist = 0;
-    unsigned int j;
+    unsigned int hw_all = 0;
     struct list_head *pos;
     struct cpufreq_dom *cpufreq_dom = NULL;
     struct cpufreq_policy new_policy;
@@ -146,9 +146,8 @@ int cpufreq_add_cpu(unsigned int cpu)
     if (cpufreq_cpu_policy[cpu])
         return 0;
 
-    ret = cpufreq_statistic_init(cpu);
-    if (ret)
-        return ret;
+    if (perf->shared_type == CPUFREQ_SHARED_TYPE_HW)
+        hw_all = 1;
 
     dom = perf->domain_info.domain;
 
@@ -160,61 +159,57 @@ int cpufreq_add_cpu(unsigned int cpu)
         }
     }
 
-    if (domexist) {
-        /* share policy with the first cpu since on same boat */
-        firstcpu = first_cpu(cpufreq_dom->map);
-        policy = cpufreq_cpu_policy[firstcpu];
-
-        cpufreq_cpu_policy[cpu] = policy;
-        cpu_set(cpu, cpufreq_dom->map);
-        cpu_set(cpu, policy->cpus);
-
-        /* domain coordination sanity check */
-        if ((perf->domain_info.coord_type !=
-             processor_pminfo[firstcpu]->perf.domain_info.coord_type) ||
-            (perf->domain_info.num_processors !=
-             processor_pminfo[firstcpu]->perf.domain_info.num_processors)) {
-            ret = -EINVAL;
-            goto err2;
-        }
-
-        printk(KERN_EMERG"adding CPU %u\n", cpu);
-    } else {
+    if (!domexist) {
         cpufreq_dom = xmalloc(struct cpufreq_dom);
-        if (!cpufreq_dom) {
-            cpufreq_statistic_exit(cpu);
+        if (!cpufreq_dom)
             return -ENOMEM;
-        }
+
         memset(cpufreq_dom, 0, sizeof(struct cpufreq_dom));
         cpufreq_dom->dom = dom;
-        cpu_set(cpu, cpufreq_dom->map);
         list_add(&cpufreq_dom->node, &cpufreq_dom_list_head);
+    } else {
+        /* domain sanity check under whatever coordination type */
+        firstcpu = first_cpu(cpufreq_dom->map);
+        if ((perf->domain_info.coord_type !=
+            processor_pminfo[firstcpu]->perf.domain_info.coord_type) ||
+            (perf->domain_info.num_processors !=
+            processor_pminfo[firstcpu]->perf.domain_info.num_processors)) {
+            return -EINVAL;
+        }
+    }
 
-        /* for the first cpu, setup policy and do init work */
+    if (!domexist || hw_all) {
         policy = xmalloc(struct cpufreq_policy);
-        if (!policy) {
-            list_del(&cpufreq_dom->node);
-            xfree(cpufreq_dom);
-            cpufreq_statistic_exit(cpu);
-            return -ENOMEM;
-        }
+        if (!policy)
+            ret = -ENOMEM;
+
         memset(policy, 0, sizeof(struct cpufreq_policy));
         policy->cpu = cpu;
-        cpu_set(cpu, policy->cpus);
         cpufreq_cpu_policy[cpu] = policy;
 
         ret = cpufreq_driver->init(policy);
-        if (ret)
-            goto err1;
+        if (ret) {
+            xfree(policy);
+            return ret;
+        }
         printk(KERN_EMERG"CPU %u initialization completed\n", cpu);
+    } else {
+        firstcpu = first_cpu(cpufreq_dom->map);
+        policy = cpufreq_cpu_policy[firstcpu];
+
+        cpufreq_cpu_policy[cpu] = policy;
+        printk(KERN_EMERG"adding CPU %u\n", cpu);
     }
 
-    /*
-     * After get full cpumap of the coordination domain,
-     * we can safely start gov here.
-     */
-    if (cpus_weight(cpufreq_dom->map) ==
-        perf->domain_info.num_processors) {
+    cpu_set(cpu, policy->cpus);
+    cpu_set(cpu, cpufreq_dom->map);
+
+    ret = cpufreq_statistic_init(cpu);
+    if (ret)
+        goto err1;
+
+    if (hw_all ||
+        (cpus_weight(cpufreq_dom->map) == perf->domain_info.num_processors)) {
         memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
         policy->governor = NULL;
 
@@ -240,22 +235,29 @@ int cpufreq_add_cpu(unsigned int cpu)
     return 0;
 
 err2:
-    cpufreq_driver->exit(policy);
+    cpufreq_statistic_exit(cpu);
 err1:
-    for_each_cpu_mask(j, cpufreq_dom->map) {
-        cpufreq_cpu_policy[j] = NULL;
-        cpufreq_statistic_exit(j);
+    cpufreq_cpu_policy[cpu] = NULL;
+    cpu_clear(cpu, policy->cpus);
+    cpu_clear(cpu, cpufreq_dom->map);
+
+    if (cpus_empty(policy->cpus)) {
+        cpufreq_driver->exit(policy);
+        xfree(policy);
+    }
+
+    if (cpus_empty(cpufreq_dom->map)) {
+        list_del(&cpufreq_dom->node);
+        xfree(cpufreq_dom);
     }
 
-    list_del(&cpufreq_dom->node);
-    xfree(cpufreq_dom);
-    xfree(policy);
     return ret;
 }
 
 int cpufreq_del_cpu(unsigned int cpu)
 {
     unsigned int dom, domexist = 0;
+    unsigned int hw_all = 0;
     struct list_head *pos;
     struct cpufreq_dom *cpufreq_dom = NULL;
     struct cpufreq_policy *policy;
@@ -270,6 +272,9 @@ int cpufreq_del_cpu(unsigned int cpu)
     if (!cpufreq_cpu_policy[cpu])
         return 0;
 
+    if (perf->shared_type == CPUFREQ_SHARED_TYPE_HW)
+        hw_all = 1;
+
     dom = perf->domain_info.domain;
     policy = cpufreq_cpu_policy[cpu];
 
@@ -284,23 +289,27 @@ int cpufreq_del_cpu(unsigned int cpu)
     if (!domexist)
         return -EINVAL;
 
-    /* for the first cpu of the domain, stop gov */
-    if (cpus_weight(cpufreq_dom->map) ==
-        perf->domain_info.num_processors)
+    /* for HW_ALL, stop gov for each core of the _PSD domain */
+    /* for SW_ALL & SW_ANY, stop gov for the 1st core of the _PSD domain */
+    if (hw_all ||
+        (cpus_weight(cpufreq_dom->map) == perf->domain_info.num_processors))
         __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
 
+    cpufreq_statistic_exit(cpu);
     cpufreq_cpu_policy[cpu] = NULL;
     cpu_clear(cpu, policy->cpus);
     cpu_clear(cpu, cpufreq_dom->map);
-    cpufreq_statistic_exit(cpu);
+
+    if (cpus_empty(policy->cpus)) {
+        cpufreq_driver->exit(policy);
+        xfree(policy);
+    }
 
     /* for the last cpu of the domain, clean room */
     /* It's safe here to free freq_table, drv_data and policy */
-    if (!cpus_weight(cpufreq_dom->map)) {
-        cpufreq_driver->exit(policy);
+    if (cpus_empty(cpufreq_dom->map)) {
         list_del(&cpufreq_dom->node);
         xfree(cpufreq_dom);
-        xfree(policy);
     }
 
     printk(KERN_EMERG"deleting CPU %u\n", cpu);
index 74dd74ea0e8b12991b1115b5a89a9b2b3c263595..a4ff4f984884709be1306bf939e2a0d9f453a90f 100644 (file)
@@ -190,6 +190,12 @@ static void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
         (void *)dbs_info, dbs_info->cpu);
 
     set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate);
+
+    if ( processor_pminfo[dbs_info->cpu]->perf.shared_type
+            == CPUFREQ_SHARED_TYPE_HW )
+    {
+        dbs_info->stoppable = 1;
+    }
 }
 
 static void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
@@ -337,3 +343,38 @@ static void __exit cpufreq_gov_dbs_exit(void)
     cpufreq_unregister_governor(&cpufreq_gov_dbs);
 }
 __exitcall(cpufreq_gov_dbs_exit);
+
+void cpufreq_dbs_timer_suspend(void)
+{
+    int cpu;
+
+    cpu = smp_processor_id();
+
+    if ( per_cpu(cpu_dbs_info,cpu).stoppable )
+    {
+        stop_timer( &dbs_timer[cpu] );
+    }
+}
+
+void cpufreq_dbs_timer_resume(void)
+{
+    int cpu;
+    struct timer* t;
+    s_time_t now;
+
+    cpu = smp_processor_id();
+
+    if ( per_cpu(cpu_dbs_info,cpu).stoppable )
+    {
+        now = NOW();
+        t = &dbs_timer[cpu];
+        if (t->expires <= now)
+        {
+            t->function(t->data);
+        }
+        else
+        {
+            set_timer(t, align_timer(now , dbs_tuners_ins.sampling_rate));
+        }
+    }
+}
index 8423664efe17ab5392c5c055a524940f101c801f..2f24c4fed6402abe6903af653fcd1e8bdc9179cb 100644 (file)
@@ -221,6 +221,7 @@ struct cpu_dbs_info_s {
     struct cpufreq_frequency_table *freq_table;
     int cpu;
     unsigned int enable:1;
+    unsigned int stoppable:1;
 };
 
 int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event);
@@ -232,4 +233,7 @@ int write_ondemand_sampling_rate(unsigned int sampling_rate);
 int write_ondemand_up_threshold(unsigned int up_threshold);
 
 int write_userspace_scaling_setspeed(unsigned int cpu, unsigned int freq);
+
+void cpufreq_dbs_timer_suspend(void);
+void cpufreq_dbs_timer_resume(void);
 #endif /* __XEN_CPUFREQ_PM_H__ */